This note project folder demonstrate the basics of spatial data analysis with R.
library(tidyverse)
library(ggthemes)
theme_set(theme_map())
library(sf)
sf_use_s2(FALSE)
library(rnaturalearth)
library(rnaturalearthdata)
Introducing a new format of geo-referenced data.
world = ne_countries(scale = "medium", type = "map_units", returnclass = "sf")
world |> ggplot() + geom_sf()
names(world)
## [1] "scalerank" "featurecla" "labelrank" "sovereignt" "sov_a3"
## [6] "adm0_dif" "level" "type" "admin" "adm0_a3"
## [11] "geou_dif" "geounit" "gu_a3" "su_dif" "subunit"
## [16] "su_a3" "brk_diff" "name" "name_long" "brk_a3"
## [21] "brk_name" "brk_group" "abbrev" "postal" "formal_en"
## [26] "formal_fr" "note_adm0" "note_brk" "name_sort" "name_alt"
## [31] "mapcolor7" "mapcolor8" "mapcolor9" "mapcolor13" "pop_est"
## [36] "gdp_md_est" "pop_year" "lastcensus" "gdp_year" "economy"
## [41] "income_grp" "wikipedia" "fips_10" "iso_a2" "iso_a3"
## [46] "iso_n3" "un_a3" "wb_a2" "wb_a3" "woe_id"
## [51] "adm0_a3_is" "adm0_a3_us" "adm0_a3_un" "adm0_a3_wb" "continent"
## [56] "region_un" "subregion" "region_wb" "name_len" "long_len"
## [61] "abbrev_len" "tiny" "homepart" "geometry"
download.file(
"https://ucdp.uu.se/downloads/ucdpprio/ucdp-prio-acd-221-csv.zip",
destfile = "data/ucdp-prio-acd-221-csv.zip")
unzip("Lec_09/data/ucdp-prio-acd-221-csv.zip", exdir = "Lec_09/data")
# For more information: https://ucdp.uu.se/downloads/index.html#armedconflict
d = read_csv("Lec_09/data/ucdp-prio-acd-221.csv")
Extract country names and find the ISO3c codes using the package
countrycode. This is a “lazy” approach. The most robust
approach will be a locally customized dataset of country names.
library(countrycode)
# Take a look at the data
# Variables of interest: side_a, side_b
d |>
select(year, side_a, side_b)
## # A tibble: 2,568 × 3
## year side_a side_b
## <dbl> <chr> <chr>
## 1 2012 Government of India GNLA
## 2 2014 Government of India GNLA
## 3 1967 Government of Egypt Government of Israel
## 4 1969 Government of Egypt Government of Israel
## 5 1970 Government of Egypt Government of Israel
## 6 1973 Government of Egypt Government of Israel
## 7 2011 Government of Sudan Republic of South Sudan
## 8 2011 Government of South Sudan SSDM/A, SSLM/A
## 9 2012 Government of South Sudan SSLM/A
## 10 2013 Government of South Sudan SPLM/A - IO, SSDM/A - Cobra Faction
## # ℹ 2,558 more rows
d_t = d |>
mutate(
side_a_country = countrycode(side_a, "country.name", "country.name"),
side_a_iso3c = countrycode(side_a, "country.name", "iso3c"),
.after = "side_a"
) |>
mutate(
side_b_country = countrycode(side_b, "country.name", "country.name"),
side_b_iso3c = countrycode(side_b, "country.name", "iso3c"),
.after = "side_b"
)
# Need to check manually if the conversion is successful.
d_t |>
select(year, side_a, side_a_country, side_a_iso3c, side_b, side_b_country, side_b_iso3c)
## # A tibble: 2,568 × 7
## year side_a side_a_country side_a_iso3c side_b side_b_country side_b_iso3c
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2012 Governm… India IND GNLA <NA> <NA>
## 2 2014 Governm… India IND GNLA <NA> <NA>
## 3 1967 Governm… Egypt EGY Gover… Israel ISR
## 4 1969 Governm… Egypt EGY Gover… Israel ISR
## 5 1970 Governm… Egypt EGY Gover… Israel ISR
## 6 1973 Governm… Egypt EGY Gover… Israel ISR
## 7 2011 Governm… Sudan SDN Repub… South Sudan SSD
## 8 2011 Governm… South Sudan SSD SSDM/… <NA> <NA>
## 9 2012 Governm… South Sudan SSD SSLM/A <NA> <NA>
## 10 2013 Governm… South Sudan SSD SPLM/… <NA> <NA>
## # ℹ 2,558 more rows
# It looks already. Then filter only the country-country conflicts
d_t_s = d_t |>
filter(!is.na(side_a_country) & !is.na(side_b_country))
# OK. I don't think this dataset is entirely clean yet.
# If this is a real research project, you need to do more manual check.
# But for demonstration purpose, let's use it as it is.
To draw the network, we need points representing countries’
locations). Using the map we already halv, we find the geographic
“centroids” for each country using functions from the sf
package.
# Calculate the countries' "centroids"
country_centroid = st_centroid(world$geometry)
# Convert the centroids to a dataframe
country_centroid_df = country_centroid |>
st_coordinates() |>
as_tibble() |>
rename("centroid_xcoord" = "X", "centroid_ycoord" = "Y")
world_t = world |>
bind_cols(country_centroid_df)
To check whether the geogrphic centroids are correctly specified we plot a world map along with the points of centroids. Check if the maps make sense and what strike you as interesting/ unintuitive.
world_t |>
ggplot() +
geom_sf() +
geom_point(aes(x = centroid_xcoord, y = centroid_ycoord))
Now we have the locations of each country (using their geographic centroids). Merge them with the conflict dataset
# Get a dataset with only countries' identifiers and locations of their centroids
world_t_centroid = world_t |>
as_tibble() |>
select(iso_a3, centroid_xcoord, centroid_ycoord) |>
filter(!is.na(iso_a3)) # There are few entries with no country identifiers. We do not need them.
d_t_s_2 = d_t_s |>
left_join(world_t_centroid, by = c("side_a_iso3c" = "iso_a3")) |>
rename("centroid_xcoord_a" = "centroid_xcoord", "centroid_ycoord_a" = "centroid_ycoord") |>
left_join(world_t_centroid, by = c("side_b_iso3c"= "iso_a3")) |>
rename("centroid_xcoord_b" = "centroid_xcoord", "centroid_ycoord_b" = "centroid_ycoord")
names(d_t_s_2)
## [1] "conflict_id" "location" "side_a"
## [4] "side_a_country" "side_a_iso3c" "side_a_id"
## [7] "side_a_2nd" "side_b" "side_b_country"
## [10] "side_b_iso3c" "side_b_id" "side_b_2nd"
## [13] "incompatibility" "territory_name" "year"
## [16] "intensity_level" "cumulative_intensity" "type_of_conflict"
## [19] "start_date" "start_prec" "start_date2"
## [22] "start_prec2" "ep_end" "ep_end_date"
## [25] "ep_end_prec" "gwno_a" "gwno_a_2nd"
## [28] "gwno_b" "gwno_b_2nd" "gwno_loc"
## [31] "region" "version" "centroid_xcoord_a"
## [34] "centroid_ycoord_a" "centroid_xcoord_b" "centroid_ycoord_b"
# ggplot() +
# geom_sf(data = world_t) +
# geom_point(data = d_t_s_2, aes(x = centroid_xcoord_a, y = centroid_ycoord_a)) +
# geom_point(data = d_t_s_2, aes(x = centroid_xcoord_b, y = centroid_ycoord_b)) +
# geom_curve(data = d_t_s_2, aes(x = centroid_xcoord_a, y = centroid_ycoord_a,
# xend = centroid_xcoord_a, yend = centroid_ycoord_b))
# ERROR above!
# Remove countries to self
d_t_s_3 = d_t_s_2 |>
filter(!(side_a_iso3c == side_b_iso3c))
ggplot() +
geom_sf(data = world_t) +
geom_point(data = d_t_s_3, aes(x = centroid_xcoord_a, y = centroid_ycoord_a)) +
geom_point(data = d_t_s_3, aes(x = centroid_xcoord_b, y = centroid_ycoord_b)) +
geom_curve(data = d_t_s_3, aes(x = centroid_xcoord_a,
y = centroid_ycoord_a,
xend = centroid_xcoord_b,
yend = centroid_ycoord_b),
curvature = 0.2, color = "red", alpah = 0.5)
ggplot() +
geom_sf(data = world_t) +
geom_text(data = d_t_s_3, aes(x = centroid_xcoord_a, y = centroid_ycoord_a, label = side_a_iso3c)) +
geom_text(data = d_t_s_3, aes(x = centroid_xcoord_b, y = centroid_ycoord_b, label = side_b_iso3c)) +
geom_curve(data = d_t_s_3, aes(x = centroid_xcoord_a,
y = centroid_ycoord_a,
xend = centroid_xcoord_b,
yend = centroid_ycoord_b), curvature = 0.2, color = "red", alpah = 0.5)
d_t_s_4 = d_t_s_3 |>
group_by(side_a_iso3c, side_b_iso3c,
centroid_xcoord_a, centroid_ycoord_a,
centroid_xcoord_b, centroid_ycoord_b) |>
count()
ggplot() +
geom_sf(data = world_t) +
geom_point(data = d_t_s_4, aes(x = centroid_xcoord_a, y = centroid_ycoord_a)) +
geom_point(data = d_t_s_4, aes(x = centroid_xcoord_b, y = centroid_ycoord_b)) +
geom_curve(data = d_t_s_4, aes(x = centroid_xcoord_a,
y = centroid_ycoord_a,
xend = centroid_xcoord_b,
yend = centroid_ycoord_b,
linewidth = n),
curvature = 0.2, color = "red", alpha = 0.5)
ggplot() +
geom_sf(data = world_t) +
geom_text(data = d_t_s_3, aes(x = centroid_xcoord_a, y = centroid_ycoord_a, label = side_a_iso3c)) +
geom_text(data = d_t_s_3, aes(x = centroid_xcoord_b, y = centroid_ycoord_b, label = side_b_iso3c)) +
geom_curve(data = d_t_s_4, aes(x = centroid_xcoord_a,
y = centroid_ycoord_a,
xend = centroid_xcoord_b,
yend = centroid_ycoord_b,
linewidth = n),
curvature = 0.2, color = "red", alpha = 0.5)
# Q: Why does the text look weird?